# The CPU Dispatcher implementation.
#
# This script handles the CPU dispatcher and requires the Meson module
# 'features'.
#
# The CPU dispatcher script is responsible for three main tasks:
#
# 1. Defining the enabled baseline and dispatched features by parsing build
#    options or compiler arguments, including detection of native flags.
#
# 2. Specifying the baseline arguments and definitions across all sources.
#
# 3. Generating the main configuration file, which contains information about
#    the enabled features, along with a collection of C macros necessary for
#    runtime dispatching. For more details, see the template file
#    `main_config.h.in`.
#
# This script exposes the following variables:
#
# - `CPU_BASELINE`: A set of CPU feature objects obtained from
#                   `mod_features.new()`, representing the minimum CPU features
#                   specified by the build option `-Dcpu-baseline`.
#
# - `CPU_BASELINE_NAMES`: A set of enabled CPU feature names, representing the
#                         minimum CPU features specified by the build option
#                         `-Dcpu-baseline`.
#
# - `CPU_DISPATCH_NAMES`: A set of enabled CPU feature names, representing the
#                         additional CPU features that can be dispatched at
#                         runtime, specified by the build option
#                         `-Dcpu-dispatch`.
#
# - `CPU_FEATURES`: A dictionary containing all supported CPU feature objects.
#
# Additionally, this script exposes a set of variables that represent each
# supported feature to be used within the Meson function
# `mod_features.multi_targets()`.

# Prefix used by all macros and features definitions
CPU_CONF_PREFIX = 'NPY_'
# main configuration name
CPU_CONF_CONFIG = 'npy_cpu_dispatch_config.h'

if get_option('disable-optimization')
  add_project_arguments('-D' + CPU_CONF_PREFIX + 'DISABLE_OPTIMIZATION', language: ['c', 'cpp'])
  CPU_CONF_BASELINE = 'none'
  CPU_CONF_DISPATCH = 'none'
else
  baseline_detect = false
  c_args = get_option('c_args')
  foreach arg : c_args
    foreach carch : ['-march', '-mcpu', '-xhost', '/QxHost']
      if arg.contains(carch)
        message('Appending option "detect" to "cpu-baseline" due to detecting global architecture c_arg "' + arg + '"')
        baseline_detect = true
        break
      endif
    endforeach
    if baseline_detect
      break
    endif
  endforeach
  # The required minimal set of required CPU features.
  CPU_CONF_BASELINE = get_option('cpu-baseline')
  if baseline_detect
    CPU_CONF_BASELINE += '+detect'
  endif
  # The required dispatched set of additional CPU features.
  CPU_CONF_DISPATCH = get_option('cpu-dispatch')
endif

# Initialize the CPU features Export the X86 features objects 'SSE', 'AVX',
# etc. plus a dictionary "X86_FEATURES" which maps to each object by its name
subdir('x86')
subdir('ppc64')
subdir('s390x')
subdir('arm')
subdir('riscv64')

CPU_FEATURES = {}
CPU_FEATURES += ARM_FEATURES
CPU_FEATURES += X86_FEATURES
CPU_FEATURES += PPC64_FEATURES
CPU_FEATURES += S390X_FEATURES
CPU_FEATURES += RV64_FEATURES

# Parse the requested baseline (CPU_CONF_BASELINE) and dispatch features
# (CPU_CONF_DISPATCH).
cpu_family = host_machine.cpu_family()
# Used by build option 'min'
min_features = {
  'x86': [SSE2],
  'x86_64': [SSE3],
  'ppc64': [],
  's390x': [],
  'arm': [],
  'aarch64': [ASIMD],
  'riscv64': [],
  'wasm32': [],
}.get(cpu_family, [])
if host_machine.endian() == 'little' and cpu_family == 'ppc64'
  min_features = [VSX2]
endif

# Used by build option 'max/native/detect'
max_features_dict = {
  'x86': X86_FEATURES,
  'x86_64': X86_FEATURES,
  'ppc64': PPC64_FEATURES,
  's390x': S390X_FEATURES,
  'arm': ARM_FEATURES,
  'aarch64': ARM_FEATURES,
  'riscv64': RV64_FEATURES,
  'wasm32': {},
}.get(cpu_family, {})
max_features = []
foreach fet_name, fet_obj : max_features_dict
  max_features += [fet_obj]
endforeach
if max_features.length() == 0
  message('Disabling CPU feature detection due to unsupported architecture: "' + cpu_family + '"')
  CPU_CONF_BASELINE = 'none'
  CPU_CONF_DISPATCH = 'none'
endif

parse_options = {
  'cpu-baseline': CPU_CONF_BASELINE,
  'cpu-dispatch': CPU_CONF_DISPATCH
}
parse_result = {
  'cpu-baseline': [],
  'cpu-dispatch': []
}
mod_features = import('features')
foreach opt_name, conf : parse_options
  # no support for regex :(?
  tokens = conf.replace(',', ' ').replace('+', ' + ').replace('-', ' - ').strip().to_upper().split()
  result = []
  ignored = []
  # append is the default
  append = true
  foreach tok : tokens
    if tok == '+'
      append = true
      continue
    elif tok == '-'
      append = false
      continue
    elif tok == 'NONE'
      continue
    elif tok == 'NATIVE'
      if not is_variable('cpu_native_features')
        compiler_id = meson.get_compiler('c').get_id()
        native_flags = {
          'intel': '-xHost',
          'intel-cl': '/QxHost',
          # FIXME: Add support for fcc(-mcpu=a64fx) compiler
        }.get(compiler_id, '-march=native')
        test_native = mod_features.test(
          max_features, anyfet: true,
          force_args: [native_flags] + '-DDETECT_FEATURES'
        )
        if not test_native[0]
          error('Option "native" doesn\'t support compiler', compiler_id)
        endif
        cpu_native_features = []
        foreach fet_name : test_native[1].get('features')
          cpu_native_features += CPU_FEATURES[fet_name]
        endforeach
      endif
      accumulate = cpu_native_features
    elif tok == 'DETECT'
      if not is_variable('cpu_detect_features')
        test_detect = mod_features.test(
          max_features, anyfet: true,
          force_args: ['-DDETECT_FEATURES'] + get_option('c_args')
        )
        cpu_detect_features = []
        foreach fet_name : test_detect[1].get('features')
          cpu_detect_features += CPU_FEATURES[fet_name]
        endforeach
      endif
      accumulate = cpu_detect_features
    elif tok == 'MIN'
      accumulate = min_features
    elif tok == 'MAX'
      accumulate = max_features
    elif tok in CPU_FEATURES
      tokobj = CPU_FEATURES[tok]
      if tokobj not in max_features
        ignored += tok
        continue
      endif
      accumulate = [tokobj]
    else
      error('Invalid token "'+tok+'" within option --'+opt_name)
    endif
    if append
      foreach fet : accumulate
        if fet not in result
          result += fet
        endif
      endforeach
    else
      filterd = []
      foreach fet : result
        if fet not in accumulate
          filterd += fet
        endif
      endforeach
      result = filterd
    endif # append
  endforeach # tok : tokens
  if ignored.length() > 0
    message(
      'During parsing ' + opt_name +
      ': The following CPU features were ignored due to platform ' +
      'incompatibility or lack of support:\n"' + ' '.join(ignored) + '"'
    )
  endif
  if result.length() > 0
    parse_result += {opt_name: mod_features.implicit_c(result)}
  endif
endforeach # opt_name, conf : parse_options

# Test the baseline and dispatch features and set their flags and #definitions
# across all sources.
#
# It is important to know that this test enables the maximum supported features
# by the platform depending on the required features.
#
# For example, if the user specified `--cpu-baseline=avx512_skx`, and the
# compiler doesn't support it, but still supports any of the implied features,
# then we enable the maximum supported implied features, e.g., AVX2, which can
# be done by specifying `anyfet: true` to the test function.
if parse_result['cpu-baseline'].length() > 0
    baseline = mod_features.test(parse_result['cpu-baseline'], anyfet: true)[1]
    baseline_args = baseline['args']
    foreach baseline_fet : baseline['defines']
        baseline_args += ['-D' + CPU_CONF_PREFIX + 'HAVE_' + baseline_fet]
    endforeach
    add_project_arguments(baseline_args, language: ['c', 'cpp'])
else
    baseline = {}
endif
# The name of the baseline features including its implied features.
CPU_BASELINE_NAMES = baseline.get('features', [])
CPU_BASELINE = []
foreach fet_name : CPU_BASELINE_NAMES
  CPU_BASELINE += [CPU_FEATURES[fet_name]]
endforeach
# Loop over all initialized features and disable any feature that is not part
# of the requested baseline and dispatch features to avoid it enabled by
# import('feature').multi_targets
foreach fet_name, fet_obj : CPU_FEATURES
  if fet_obj in parse_result['cpu-dispatch'] or fet_name in CPU_BASELINE_NAMES
    continue
  endif
  fet_obj.update(disable: 'Not part of the requested features')
endforeach

CPU_DISPATCH_NAMES = []
foreach fet_obj : parse_result['cpu-dispatch']
  # skip baseline features
  if fet_obj.get('name') in CPU_BASELINE_NAMES
    continue
  endif
  fet_test = mod_features.test(fet_obj)
  if not fet_test[0]
    continue
  endif
  CPU_DISPATCH_NAMES += [fet_obj.get('name')]
endforeach
# Generate main configuration header see 'main_config.h.in' for more
# clarification.
main_config = {
  'P': CPU_CONF_PREFIX,
  'WITH_CPU_BASELINE': ' '.join(CPU_BASELINE_NAMES),
  'WITH_CPU_BASELINE_N': CPU_BASELINE_NAMES.length(),
  'WITH_CPU_DISPATCH': ' '.join(CPU_DISPATCH_NAMES),
  'WITH_CPU_DISPATCH_N': CPU_DISPATCH_NAMES.length(),
}
clines = []
macro_tpl = '@0@_CPU_EXPAND(EXEC_CB(@1@, __VA_ARGS__)) \\'
foreach fet : CPU_BASELINE_NAMES
  clines += macro_tpl.format(CPU_CONF_PREFIX, fet)
endforeach
main_config += {'WITH_CPU_BASELINE_CALL': '\n'.join(clines)}
clines = []
foreach fet : CPU_DISPATCH_NAMES
  clines += macro_tpl.format(CPU_CONF_PREFIX, fet)
endforeach
main_config += {'WITH_CPU_DISPATCH_CALL': '\n'.join(clines)}

configure_file(
  input : 'main_config.h.in',
  output : CPU_CONF_CONFIG,
  configuration : configuration_data(main_config)
)
add_project_arguments(
  '-I' + meson.current_build_dir(),
  language: ['c', 'cpp']
)

message(
'''
CPU Optimization Options
  baseline:
    Requested : @0@
    Enabled   : @1@
  dispatch:
    Requested : @2@
    Enabled   : @3@
'''.format(
    CPU_CONF_BASELINE, ' '.join(CPU_BASELINE_NAMES),
    CPU_CONF_DISPATCH, ' '.join(CPU_DISPATCH_NAMES)
  )
)
